

# data from https://www.ssa.gov/oact/babynames/limits.html
# stored in `ssa/`

names = map(
  seq(1940, 2000, 10),
  ~ data.table::fread(
    glue('ssa/yob{.x}.txt'),
    sep = ',',
    col.names = c('first_name', 'gender', 'number')
  ) %>%
    mutate(year = .x)
) %>% bind_rows() %>%
  mutate(first_name = tolower(first_name) %>% trimws()) %>%
  pivot_wider(names_from = gender,
              values_from = number,
              values_fill = 0) %>%
  group_by(year, first_name) %>%
  summarise(prop_m = `M` / (`F` + `M`) %>% round(2)) %>%
  group_by(first_name) %>%
  summarise(gender = case_when(all(prop_m > .9) ~ 'm', all(prop_m < .1) ~ 'f', T ~ 'u'))

save(names, file = 'gender/names_gender.rds')
